// Part 1: build s_ij
use "${data}\Source\raw_ape_issn", clear

	/* try to correct the NAF with the Apet info in DADS (which is used in the outcome variables) */
	merge m:1 siren using "${data}/Utils/SirenApet_maineng_consolidate", nogen keep(1 3)
	merge m:1 siren using "${data}/utils/SirenApet_maineng_noconsolidate", nogen update keep(1 3 4 5)
	replace ape = apet if !mi(apet)
	drop apet
/*
	
	merge m:1 siren using "${intpath}/siren_apet_stable", keep( 3) nogen
	replace apet = apet_stable if apet_stable != ""
	drop apet_stable
*/
	
	//patch for main missing APE
	replace ape = "2120Z" if siren == 732059332 //SANOFI
	replace ape = "2016Z" if siren == 445115611 //ARKEMA
	replace ape = "2120Z" if siren == 306094053 //MODAX
	replace ape = "2630Z" if siren == 307593129 //MITSUBISHI ELECTRIC TELECOM
	replace ape = "2120Z" if siren == 310635248 //CELLTECH PHARMA
	replace ape = "2670Z" if siren == 314899972 //NETTEST PHOTONICS
	replace ape = "0113Z" if siren == 321628950 //LIMAGRAIN GENETICS
	replace ape = "8299Z" if siren == 322593104 //SETVAL
	replace ape = "7112B" if siren == 324502194 //SORAPEC
	replace ape = "2651B" if siren == 327665865 //SINTERS
	replace ape = "3220C" if siren == 340818921 //SEDIA
	replace ape = "6110Z" if siren == 380129866 //ORANGE
	*replace ape = "6110Z" if siren == 380129866 //ORANGE
	replace siren = 775685019 if siren == 508689189
	replace ape = "7219Z" if siren == 775685019 //CEA

	
	
	bys ape: egen nb_pat = nvals(appln_id)
	bys ape: egen nb_doi = nvals(doi)
	drop if nb_doi < ${MIN_DOI}
	gen nb_papers = 1
	ren journal_issn issn
	gcollapse (sum) nb_papers, by(ape issn)
	bys issn: egen nb_papers_tot = sum(nb_papers)
	gen share_ape_issn = nb_papers / nb_papers_tot
	keep ape issn share nb_papers
	gduplicates drop
	rename nb_papers nb_papers_ape
	label data "Contains share of each APE sector in the citations of an ISSN"
save "${tmp}/matrix_ape_issn", replace

// Part 2: build s_lj
tempfile corresp_labex
use labexid correspondance using "${data}/Source/main_labex", clear
save `corresp_labex'

use "${data}/Source/raw_issn_labex_grobid", clear
	*drop if inlist(labexid, "i2", "i41") // no information on these
	merge m:1 labexid using `corresp_labex', nogen keep(1 3)
	replace labexid = correspondance if substr(corresp,1,1) == "i" // replace first submissions for resubmissions in 2nd wave (starting with i)
	drop correspond
	drop if regexm(file,"bib") | regexm(file,"pub")
	bys labexid: egen nb_doi = nvals(doi)
	keep doi* *issn* labexid
	replace issn = journal_issn if mi(issn)
	gduplicates drop
	gen nb_papers = 1
	gcollapse (sum) nb_papers, by(labexid issn)
	bys labexid: egen nb_papers_tot = sum(nb_papers)
	gen share_labex_issn = nb_papers / nb_papers_tot
	label data "Contains share of each ISSN in the citations of a Labex"
	rename nb_papers nb_papers_labex
save "${tmp}/matrix_issn_labex", replace
